import os
import re
import shutil
import urllib2
import datetime

import source
import xml.dom.minidom 

class malwaredomainlist(source.Base):

  def __init__(self):
    self.retrieved = False

    self.url = "http://www.malwaredomainlist.com/hostslist/mdl.xml"
    self.dir = os.path.join('log', 'blacklists', str(datetime.date.today()))
    self.filename = os.path.join(self.dir, 'malwaredomainlist.com')
      
    self.patterns = {"ip": "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"}
        
  @property
  def name(self):
    return "malwaredomainlist.com"
    
  def retrieve(self):
    if not os.path.exists(self.dir):
      os.makedirs(self.dir)

    req = urllib2.urlopen(self.url)
    with open(self.filename, 'wb') as f:
      shutil.copyfileobj(req, f)

    self.retrieved = True
    return
    
  @property
  def domains(self):
    if not self.retrieved:
      self.retrieve()
    
    with open(self.filename) as f:
      dom = xml.dom.minidom.parse(f)
      items = dom.getElementsByTagName("item")
    
      for item in items:
        t = item.getElementsByTagName("title")[0].childNodes[0].data
        d = item.getElementsByTagName("description")[0].childNodes[0].data
      
        title = t.rsplit(' ', 1)[0]
        description = re.findall("^.*Description:\s+(.*)$", d)[0]
      
        # skip addresses that are raw IPs
        if re.match(self.patterns["ip"], title):
          continue
      
        yield {"domain": title, "label": description, "info": d}
      
